We started by creating a data set of larval traits for Diptera from the literature, with records by genus.

Next, we used TaxReformer to update these genus names based on Open Tree Taxonomy and search them on NCBI.

We then used phylotaR to download othologous loci for phylogenetic inference. Finally, we will process phylotaR output to retrieve an alignment.

library(phylotaR)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
── Attaching packages ────────────────────────────────── tidyverse 1.3.2 ──✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 1.0.0── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

Downloading data

Now let’s load the dataframe updated by TaxReformer and pull taxonomic IDs.

ncbi_ids = read_csv('updated_cuticle_papers_taxa.csv') %>%
  pull(`NCBI ID`) %>%
  na.exclude() %>%
  as.numeric()
Rows: 716 Columns: 12── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (9): family, genus, Habitat, habitat_cat1, habitat_cat2, source, sp...
dbl (2): Open Tree Taxonomy ID, NCBI ID
lgl (1): Family matches
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(ncbi_ids)
[1]  462271  210245  210239  210230 1932988 1932988

Now let’s setup phylotaR. Commented out parts have been done using an Rscript in a batch job since they fail in Rstudio.

workdir = './phylotar/'

dir.create(workdir, showWarnings = F)
blast_path ='/opt/homebrew/Cellar/blast/2.13.0/bin'
#setup(wd = workdir,
#      txid = ncbi_ids,
#      ncbi_dr = blast_path,
#      v = F,
#      ncps = 4
#      )

And let’s run

#run(workdir)
#restart(workdir)

Extracting data from phylotaR

Now that the pipeline is completed, we can load the phylotar object to inspect the results. Commented out parts have been done using an Rscript in a batch job since they fail in Rstudio. First, let’s keep the 3 longest sequences for each genus.

#phylo = read_phylota(workdir)
#phylo_genus = drop_by_rank(phylo,rnk = 'genus', keep_higher = FALSE, n = 3, choose_by = c('nncltds','pambgs','age'), greatest = c(T,F,F))

Now let’s find which clusters had more than 20 genera

#clusters_over_20 = names(which(get_ntaxa(phylo_genus,cid = phylo_genus@cids, rnk='genus',keep_higher=FALSE) > 20))
#clusters_over_20 

This was the output:

> clusters_over_20 
 [1] "0"    "9"    "10"   "12"   "13"   "16"   "63"   "129"  "137"  "138" 
[11] "141"  "176"  "177"  "203"  "204"  "409"  "504"  "1065" "3194" "3432"

Now let’s retain only these clusters and save the phylotaR object. It was still impossible to read this object in Rstudio, so we will keep commenting out.

#phylo_genus_20 = drop_clstrs(phylo_genus,clusters_over_20)
#save(phylo_genus_20, file='phylo_genus.RData')
#load('phylo_genus.RData')

Now let’s see which are the terms associated with each cluster:

#calc_wrdfrq(phylo_genus_20, cid = phylo_genus_20@cids,min_frq=0.01) %>% purrr::map_dfr(.id='CID', .f = ~broom::tidy(.x)) %>% write_csv(file = 'tables/cluster_terms.csv')

read_csv('tables/cluster_terms.csv')
Rows: 241 Columns: 3── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (1): wrds
dbl (2): CID, n
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

These seem to be the gene identities:

gene_identities = tibble(CID = c(0,9,10,12,13,16,63,129,137,138,141,176,177,203,204,409,504,1065,3194,3432),
                         gene = c("COI",
"28S",
"COI",
"12S+16S",
"COI",
"COI",
"NADH",
"COI",
"COI",
"EF1a",
"18S",
"CAD",
"CAD",
"28S",
"28S",
"AATS",
"TPI",
"PGD",
"MAC",
"MCS"))
gene_identities

So this means that some clusters correspond to the same gene: maybe they are just different regions. Let’s extract sequence information for all clusters and align the same genes together, and we can filter them out later based on the overlap with the rest of the dataset.

Let’s now extract all sequences from the phylotaR object into a table so we can write fasta files.

Let’s start by listing sequence accessions and taxonomic IDs for each cluster

#cluster_seqs = phylo_genus_20@clstrs@clstrs %>% purrr::map_dfr(.id = 'CID', .f = ~tibble(ncbi_accession = .x@sids))
#write_csv(cluster_seqs,file='tables/cluster_seqs.csv')

Now let’s make another table with sequences and their associated information

#seq_info = phylo_genus_20@sqs@sqs %>% purrr::map_df(.f = ~tibble(ncbi_accession = .x@id, ncbi_taxid = .x@txid, ncbi_organism = .x@orgnsm, seq_name = .x@dfln, seq = rawToChar(.x@sq)))
#write_csv(seq_info,file='tables/seq_info.csv')

Finally, let’s make a table with taxonomic information

# tax_info = seq_info %>% 
#   select(ncbi_taxid) %>% 
#   distinct() %>% 
#   mutate(ncbi_tax_name = get_tx_slot(phylo_genus_20, 
#                                        txid = ncbi_taxid, 
#                                        slt_nm = 'scnm'),
#          ncbi_genus_id = get_txids(phylo_genus_20, 
#                                    txids = ncbi_taxid, 
#                                    rnk = 'genus'),
#          ncbi_genus_name = get_tx_slot(phylo_genus_20, 
#                                        txid = ncbi_genus_id, 
#                                        slt_nm = 'scnm'),
#          ncbi_family_id = get_txids(phylo_genus_20, 
#                                    txids = ncbi_taxid, 
#                                    rnk = 'family'),
#          ncbi_family_name = get_tx_slot(phylo_genus_20, 
#                                        txid = ncbi_family_id, 
#                                        slt_nm = 'scnm'),
#          )
# write_csv(tax_info,file='tables/tax_info.csv')

Checking dataset

Now we have everything out of phylotaR. We can load tables and work in Rstudio.

cluster_seqs = read_csv(file='tables/cluster_seqs.csv')
Rows: 4502 Columns: 2── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (1): ncbi_accession
dbl (1): CID
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cluster_seqs
seq_info = read_csv(file='tables/seq_info.csv')
Rows: 4055 Columns: 5── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (4): ncbi_accession, ncbi_organism, seq_name, seq
dbl (1): ncbi_taxid
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
seq_info 
tax_info = read_csv(file='tables/tax_info.csv')
Rows: 1955 Columns: 6── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (3): ncbi_tax_name, ncbi_genus_name, ncbi_family_name
dbl (3): ncbi_taxid, ncbi_genus_id, ncbi_family_id
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tax_info 

Now let’s pull our table with samples for which we have phenotypes, so we can combine everything:

phenotype_table = read_csv('updated_cuticle_papers_taxa.csv')
Rows: 716 Columns: 12── Column specification ───────────────────────────────────────────────────
Delimiter: ","
chr (9): family, genus, Habitat, habitat_cat1, habitat_cat2, source, sp...
dbl (2): Open Tree Taxonomy ID, NCBI ID
lgl (1): Family matches
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
phenotype_table

How many genera are represented by at least one sequence for each gene?

First, let’s create the table that I need for this. We will first summarize the data that we got with phylotaR and then add the genera missing from phylotaR:

occ_matrix = cluster_seqs %>% 
  left_join(gene_identities) %>% 
  left_join(tax_info) %>%
  select(ncbi_genus_id,ncbi_genus_name,ncbi_family_name,gene) %>%
  distinct() %>%
  mutate(ncbi_genus_name = factor(ncbi_genus_name, ordered = T),
         gene = fct_infreq(gene, ordered = T),
         sequenced = T
         ) 
Joining with `by = join_by(CID)`Joining with `by = join_by(ncbi_taxid)`
occ_matrix = expand(occ_matrix, ncbi_genus_name, gene) %>%
  left_join(select(occ_matrix,ncbi_genus_name, gene, sequenced))
Joining with `by = join_by(ncbi_genus_name, gene)`
missing_genera = phenotype_table %>%
  select(ncbi_genus_name = updated_genus) %>%
  filter(!(ncbi_genus_name %in% occ_matrix$ncbi_genus_name)) %>%
  na.exclude() %>%
  pull(ncbi_genus_name) %>%
  unique()
  
occ_matrix = expand.grid(ncbi_genus_name = missing_genera, 
                         gene = unique(occ_matrix$gene)) %>%
  mutate(sequenced = F) %>%
  bind_rows(mutate(occ_matrix, ncbi_genus_name = as.character(ncbi_genus_name))) %>%
   mutate(sequenced = replace_na(sequenced, FALSE),
         ncbi_genus_name = fct_reorder(ncbi_genus_name,sequenced,.fun = sum,.desc = F)) 

occ_matrix
NA

Now let’s plot

p1 = ggplot(occ_matrix) +
  geom_tile(aes(x=gene,y=ncbi_genus_name,fill=sequenced)) +
  scale_fill_brewer(type = 'qual',palette = 'Spectral') +
  theme(axis.text.y = element_text(size = 3),
        axis.text.x = element_text(hjust = 1,angle = 30))
p1

p2 = occ_matrix %>%
  group_by(ncbi_genus_name) %>%
  summarize(sequenced = mean(sequenced)) %>%
  ggplot() +
  geom_col(aes(x=sequenced,y=ncbi_genus_name)) +
  scale_x_continuous(labels =  scales::percent, limits = c(0,1)) +
  ggthemes::theme_tufte() +
  theme(axis.text.y = element_blank(),
        axis.title.y = element_blank(),
        axis.line.y = element_blank(),
        axis.ticks.y = element_blank()) 
p2

p3 = occ_matrix %>%
  group_by(gene) %>%
  summarize(sequenced = mean(sequenced)) %>%
  ggplot() +
  geom_col(aes(x=gene,y=sequenced)) +
  scale_y_continuous(labels =  scales::percent, limits = c(0,1)) +
  ggthemes::theme_tufte() +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.line.x = element_blank(),
        axis.ticks.x = element_blank())
p3

Now let’s put everything together.

p = cowplot::insert_xaxis_grob(p1,p3)
p = cowplot::insert_yaxis_grob(p,p2)
p = cowplot::ggdraw(p)
ggsave(plot = p,filename = 'gene_occupancy.pdf',device = 'pdf',path = 'plots',height = 25)
Saving 7 x 25 in image

What proportion of genera have at least one sequence for the 7 genes with better coverage?

best_genes = occ_matrix %>%
  group_by(gene) %>%
  summarize(N=sum(sequenced))

best_genes 

genes_to_keep = best_genes$gene[1:7]

occ_matrix %>%
  filter(gene %in% genes_to_keep) %>%
  group_by(ncbi_genus_name) %>%
  summarize(any_sequenced = any(sequenced)) %>%
  group_by(any_sequenced) %>%
  summarize(N = n())

List genera without data.

occ_matrix %>%
  filter(gene %in% genes_to_keep) %>%
  group_by(ncbi_genus_name) %>%
  summarize(any_sequenced = any(sequenced)) %>%
  filter(!any_sequenced) %>%
  pull(ncbi_genus_name)
  [1] Acemya            Acklandia         Acridomyia       
  [4] Acyglossa         Afrothaumalea     Albuquerquea     
  [7] Amalopteryx       Anastoechus       Anatalanta       
 [10] Anthomyiopsis     Apatolestes       Apetaenus        
 [13] Aphidoletes       Apistomyia        Araucoderus      
 [16] Archicollinella   Aulacephala       Aulacigaster     
 [19] Auster            Australosymmerus  Batrachomyia     
 [22] Bibiocephala      Boettcherisca     Bolbodimyia      
 [25] Booponus          Brachyopa         Brittenia        
 [28] Cairnsimyia       Calobatina        Calycopteryx     
 [31] Calythea          Camptochironomus  Canace           
 [34] Canacea           Canaceoides       Cephalopina      
 [37] Cerajocera        Cerodontha        Ceroplatus       
 [40] Chiastocheta      Chrysogaster      Cistudinomyia    
 [43] Cleonice          Cobboldia         Copestylum       
 [46] Cylindrotoma      Cypselosoma       Dictenidia       
 [49] Dictya            Dimecoenia        Diogma           
 [52] Discomyza         Earomyia          Elomyia          
 [55] Epicypta          Epiplastocerus    Episthetosoma    
 [58] Erax              Eriozona          Estheria         
 [61] Eucorethra        Eulimnia          Eumetopiella     
 [64] Eupachygaster     Euryomma          Eutropha         
 [67] Gedoelstia        Glaurocara        Gymnoptera       
 [70] Hammerschmidtia   Hapalothrix       Hecamede         
 [73] Helius            Helosciomyza      Hemerodromia     
 [76] Heteronychia      Heterostylum      Hippelates       
 [79] Hirtea            Hybomitra         Hyperalonia      
 [82] Japanagromyza     Javanoxenia       Kirkioestrus     
 [85] Laphria           Lasiomma          Lepidanthrax     
 [88] Leptometopa       Leptopsilopa      Leucotabanus     
 [91] Lioscinella       Lipara            Lipsothrix       
 [94] Lispoides         Litoleptis        Lochmostylia     
 [97] Mallota           Meiosimyza        Metacemyia       
[100] Metasyrphus       Milada            Misotermes       
[103] Morellia          Mycophila         Myospila         
[106] Napomyza          Neoascia          Neoceroplatus    
[109] Neocuterebra      Neopachygaster    Neottiophilum    
[112] Nepenthomyia      Nephrotoma        Niphta           
[115] Nothodixa         Ochotonia         Odinia           
[118] Odontoloxozus     Oestroderma       Onesiomima       
[121] Ornidia           Ornithophila      Pachylophus      
[124] Palaeodipteron    Pallasiomyia      Pandasyopthalmus 
[127] Parallelomma      Parasarcophaga    Parepidosis      
[130] Pavlovskiata      Pegohylemyia      Phaonina         
[133] Pharyngobolus     Philorus          Phorodonta       
[136] Phytagromyza      Placopsidella     Plastocerontus   
[139] Platycephala      Polytocus         Portschinskia    
[142] Prionocera        Procecidochares   Prosthetosoma    
[145] Pseudiastata      Pseudohypocera    Pseudoperichaeta 
[148] Pteromicra        Rhabdochaeta      Rhexoza          
[151] Rhizomyia         Rhypholophus      Ropalomera       
[154] Ruttenia          Scholastes        Shannonia        
[157] Sobarocephala     Souzalopesiella   Spania           
[160] Stenomicra        Strobiloestrus    Symbiocladius    
[163] Syndiamesa        Syrphus           Systoechus       
[166] Tanyptera         Tapeigaster       Termitocalliphora
[169] Termitostroma     Tetraplastocerus  Therobia         
[172] Tracheomyia       Trichonta         Trichopsidea     
[175] Trichopteromyia   Triogma           Villeneuviella   
[178] Xanthogramma      Zabrachia        
638 Levels: Acemya Acklandia Acridomyia Acyglossa ... Exorista

Export sequences for alignment

First, let’s create lists with sequence information. For some reason, the taxonomic information in table cluster_seqs is incorrect, so let’s remove it.

gene_lists = select(cluster_seqs,-ncbi_taxid) %>%
  left_join(gene_identities) %>%
  filter(gene %in% genes_to_keep) %>%
  left_join(seq_info) %>%
  left_join(tax_info) %>%
  mutate(export_seqname = str_c(str_c(ncbi_genus_name,'_',ncbi_genus_id),
                         str_c(str_c('accession:',ncbi_accession),
                               str_c('ncbi_family_name:',ncbi_family_name),
                               str_c('ncbi_family_id:',ncbi_family_id),
                               str_c('phylotaR_cluster:',CID),
                               str_c('ncbi_seqname:',seq_name),
                               sep=';'),
                         sep = '|')
         ) %>%
  select(gene,seq,export_seqname) %>%
  filter(!is.na(seq)) %>%
  mutate(seqrecord=str_c(str_c('>',export_seqname),
                         seq,sep='\n'))
Joining with `by = join_by(CID)`Joining with `by = join_by(ncbi_accession)`Joining with `by = join_by(ncbi_taxid)`
gene_lists = split(gene_lists, gene_lists$gene)

gene_lists
$`12S+16S`

$`18S`

$`28S`

$AATS

$CAD

$COI

$EF1a
NA

Now let’s create fasta files

dir.create('exported_fasta')
Warning: 'exported_fasta' already exists
purrr::walk(gene_lists, .f = ~str_c(.x$seqrecord,collapse='\n') %>% write(file=file.path('exported_fasta',str_c(unique(.x$gene),'.fasta'))))
LS0tCnRpdGxlOiAiT2J0YWluaW5nIGEgcGh5bG9nZW5ldGljIGRhdGFzZXQgZm9yIHBoeWxvZ2VueSBvZiBsYXJ2YWwgRGlwdGVyYSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKV2Ugc3RhcnRlZCBieSBjcmVhdGluZyBhIGRhdGEgc2V0IG9mIGxhcnZhbCB0cmFpdHMgZm9yIERpcHRlcmEgZnJvbSB0aGUgbGl0ZXJhdHVyZSwgd2l0aCByZWNvcmRzIGJ5IGdlbnVzLiAKCk5leHQsIHdlIHVzZWQgW1RheFJlZm9ybWVyXShodHRwczovL2dpdGh1Yi5jb20vYnJ1bm9hc20vVGF4UmVmb3JtZXIpIHRvIHVwZGF0ZSB0aGVzZSBnZW51cyBuYW1lcyBiYXNlZCBvbiBPcGVuIFRyZWUgVGF4b25vbXkgYW5kIHNlYXJjaCB0aGVtIG9uIE5DQkkuCgpXZSB0aGVuIHVzZWQgW3BoeWxvdGFSXShodHRwczovL2RvY3Mucm9wZW5zY2kub3JnL3BoeWxvdGFSLykgdG8gZG93bmxvYWQgb3Rob2xvZ291cyBsb2NpIGZvciBwaHlsb2dlbmV0aWMgaW5mZXJlbmNlLiBGaW5hbGx5LCB3ZSB3aWxsIHByb2Nlc3MgcGh5bG90YVIgb3V0cHV0IHRvIHJldHJpZXZlIGFuIGFsaWdubWVudC4KYGBge3J9CmxpYnJhcnkocGh5bG90YVIpCmxpYnJhcnkodGlkeXZlcnNlKQpgYGAKIyBEb3dubG9hZGluZyBkYXRhCgpOb3cgbGV0J3MgbG9hZCB0aGUgZGF0YWZyYW1lIHVwZGF0ZWQgYnkgVGF4UmVmb3JtZXIgYW5kIHB1bGwgdGF4b25vbWljIElEcy4KYGBge3J9Cm5jYmlfaWRzID0gcmVhZF9jc3YoJ3VwZGF0ZWRfY3V0aWNsZV9wYXBlcnNfdGF4YS5jc3YnKSAlPiUKICBwdWxsKGBOQ0JJIElEYCkgJT4lCiAgbmEuZXhjbHVkZSgpICU+JQogIGFzLm51bWVyaWMoKQoKaGVhZChuY2JpX2lkcykKYGBgCgpOb3cgbGV0J3Mgc2V0dXAgcGh5bG90YVIuIENvbW1lbnRlZCBvdXQgcGFydHMgaGF2ZSBiZWVuIGRvbmUgdXNpbmcgYW4gUnNjcmlwdCBpbiBhIGJhdGNoIGpvYiBzaW5jZSB0aGV5IGZhaWwgaW4gUnN0dWRpby4KYGBge3J9CndvcmtkaXIgPSAnLi9waHlsb3Rhci8nCgpkaXIuY3JlYXRlKHdvcmtkaXIsIHNob3dXYXJuaW5ncyA9IEYpCmJsYXN0X3BhdGggPScvb3B0L2hvbWVicmV3L0NlbGxhci9ibGFzdC8yLjEzLjAvYmluJwojc2V0dXAod2QgPSB3b3JrZGlyLAojICAgICAgdHhpZCA9IG5jYmlfaWRzLAojICAgICAgbmNiaV9kciA9IGJsYXN0X3BhdGgsCiMgICAgICB2ID0gRiwKIyAgICAgIG5jcHMgPSA0CiMgICAgICApCmBgYAoKQW5kIGxldCdzIHJ1bgpgYGB7cn0KI3J1bih3b3JrZGlyKQojcmVzdGFydCh3b3JrZGlyKQpgYGAKCiMgRXh0cmFjdGluZyBkYXRhIGZyb20gcGh5bG90YVIKCk5vdyB0aGF0IHRoZSBwaXBlbGluZSBpcyBjb21wbGV0ZWQsIHdlIGNhbiBsb2FkIHRoZSBwaHlsb3RhciBvYmplY3QgdG8gaW5zcGVjdCB0aGUgcmVzdWx0cy4gQ29tbWVudGVkIG91dCBwYXJ0cyBoYXZlIGJlZW4gZG9uZSB1c2luZyBhbiBSc2NyaXB0IGluIGEgYmF0Y2ggam9iIHNpbmNlIHRoZXkgZmFpbCBpbiBSc3R1ZGlvLiBGaXJzdCwgbGV0J3Mga2VlcCB0aGUgMyBsb25nZXN0IHNlcXVlbmNlcyBmb3IgZWFjaCBnZW51cy4KCmBgYHtyfQojcGh5bG8gPSByZWFkX3BoeWxvdGEod29ya2RpcikKI3BoeWxvX2dlbnVzID0gZHJvcF9ieV9yYW5rKHBoeWxvLHJuayA9ICdnZW51cycsIGtlZXBfaGlnaGVyID0gRkFMU0UsIG4gPSAzLCBjaG9vc2VfYnkgPSBjKCdubmNsdGRzJywncGFtYmdzJywnYWdlJyksIGdyZWF0ZXN0ID0gYyhULEYsRikpCmBgYAoKTm93IGxldCdzIGZpbmQgd2hpY2ggY2x1c3RlcnMgaGFkIG1vcmUgdGhhbiAyMCBnZW5lcmEKYGBge3J9CiNjbHVzdGVyc19vdmVyXzIwID0gbmFtZXMod2hpY2goZ2V0X250YXhhKHBoeWxvX2dlbnVzLGNpZCA9IHBoeWxvX2dlbnVzQGNpZHMsIHJuaz0nZ2VudXMnLGtlZXBfaGlnaGVyPUZBTFNFKSA+IDIwKSkKI2NsdXN0ZXJzX292ZXJfMjAgCmBgYApUaGlzIHdhcyB0aGUgb3V0cHV0OgpgYGAKPiBjbHVzdGVyc19vdmVyXzIwIAogWzFdICIwIiAgICAiOSIgICAgIjEwIiAgICIxMiIgICAiMTMiICAgIjE2IiAgICI2MyIgICAiMTI5IiAgIjEzNyIgICIxMzgiIApbMTFdICIxNDEiICAiMTc2IiAgIjE3NyIgICIyMDMiICAiMjA0IiAgIjQwOSIgICI1MDQiICAiMTA2NSIgIjMxOTQiICIzNDMyIgpgYGAKCk5vdyBsZXQncyByZXRhaW4gb25seSB0aGVzZSBjbHVzdGVycyBhbmQgc2F2ZSB0aGUgcGh5bG90YVIgb2JqZWN0LiBJdCB3YXMgc3RpbGwgaW1wb3NzaWJsZSB0byByZWFkIHRoaXMgb2JqZWN0IGluIFJzdHVkaW8sIHNvIHdlIHdpbGwga2VlcCBjb21tZW50aW5nIG91dC4KCmBgYHtyfQojcGh5bG9fZ2VudXNfMjAgPSBkcm9wX2Nsc3RycyhwaHlsb19nZW51cyxjbHVzdGVyc19vdmVyXzIwKQojc2F2ZShwaHlsb19nZW51c18yMCwgZmlsZT0ncGh5bG9fZ2VudXMuUkRhdGEnKQojbG9hZCgncGh5bG9fZ2VudXMuUkRhdGEnKQpgYGAKCgpOb3cgbGV0J3Mgc2VlIHdoaWNoIGFyZSB0aGUgdGVybXMgYXNzb2NpYXRlZCB3aXRoIGVhY2ggY2x1c3RlcjoKYGBge3J9CiNjYWxjX3dyZGZycShwaHlsb19nZW51c18yMCwgY2lkID0gcGh5bG9fZ2VudXNfMjBAY2lkcyxtaW5fZnJxPTAuMDEpICU+JSBwdXJycjo6bWFwX2RmciguaWQ9J0NJRCcsIC5mID0gfmJyb29tOjp0aWR5KC54KSkgJT4lIHdyaXRlX2NzdihmaWxlID0gJ3RhYmxlcy9jbHVzdGVyX3Rlcm1zLmNzdicpCgpyZWFkX2NzdigndGFibGVzL2NsdXN0ZXJfdGVybXMuY3N2JykKCmBgYAoKVGhlc2Ugc2VlbSB0byBiZSB0aGUgZ2VuZSBpZGVudGl0aWVzOgoKYGBge3J9CmdlbmVfaWRlbnRpdGllcyA9IHRpYmJsZShDSUQgPSBjKDAsOSwxMCwxMiwxMywxNiw2MywxMjksMTM3LDEzOCwxNDEsMTc2LDE3NywyMDMsMjA0LDQwOSw1MDQsMTA2NSwzMTk0LDM0MzIpLAogICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZSA9IGMoIkNPSSIsCiIyOFMiLAoiQ09JIiwKIjEyUysxNlMiLAoiQ09JIiwKIkNPSSIsCiJOQURIIiwKIkNPSSIsCiJDT0kiLAoiRUYxYSIsCiIxOFMiLAoiQ0FEIiwKIkNBRCIsCiIyOFMiLAoiMjhTIiwKIkFBVFMiLAoiVFBJIiwKIlBHRCIsCiJNQUMiLAoiTUNTIikpCmdlbmVfaWRlbnRpdGllcwpgYGAKClNvIHRoaXMgbWVhbnMgdGhhdCBzb21lIGNsdXN0ZXJzIGNvcnJlc3BvbmQgdG8gdGhlIHNhbWUgZ2VuZTogbWF5YmUgdGhleSBhcmUganVzdCBkaWZmZXJlbnQgcmVnaW9ucy4gTGV0J3MgZXh0cmFjdCBzZXF1ZW5jZSBpbmZvcm1hdGlvbiBmb3IgYWxsIGNsdXN0ZXJzIGFuZCBhbGlnbiB0aGUgc2FtZSBnZW5lcyB0b2dldGhlciwgYW5kIHdlIGNhbiBmaWx0ZXIgdGhlbSBvdXQgbGF0ZXIgYmFzZWQgb24gdGhlIG92ZXJsYXAgd2l0aCB0aGUgcmVzdCBvZiB0aGUgZGF0YXNldC4KCkxldCdzIG5vdyBleHRyYWN0IGFsbCBzZXF1ZW5jZXMgZnJvbSB0aGUgcGh5bG90YVIgb2JqZWN0IGludG8gYSB0YWJsZSBzbyB3ZSBjYW4gd3JpdGUgZmFzdGEgZmlsZXMuCgpMZXQncyBzdGFydCBieSBsaXN0aW5nIHNlcXVlbmNlIGFjY2Vzc2lvbnMgYW5kIHRheG9ub21pYyBJRHMgZm9yIGVhY2ggY2x1c3RlcgpgYGB7cn0KI2NsdXN0ZXJfc2VxcyA9IHBoeWxvX2dlbnVzXzIwQGNsc3Ryc0BjbHN0cnMgJT4lIHB1cnJyOjptYXBfZGZyKC5pZCA9ICdDSUQnLCAuZiA9IH50aWJibGUobmNiaV9hY2Nlc3Npb24gPSAueEBzaWRzKSkKI3dyaXRlX2NzdihjbHVzdGVyX3NlcXMsZmlsZT0ndGFibGVzL2NsdXN0ZXJfc2Vxcy5jc3YnKQpgYGAKCk5vdyBsZXQncyBtYWtlIGFub3RoZXIgdGFibGUgd2l0aCBzZXF1ZW5jZXMgYW5kIHRoZWlyIGFzc29jaWF0ZWQgaW5mb3JtYXRpb24KYGBge3J9CiNzZXFfaW5mbyA9IHBoeWxvX2dlbnVzXzIwQHNxc0BzcXMgJT4lIHB1cnJyOjptYXBfZGYoLmYgPSB+dGliYmxlKG5jYmlfYWNjZXNzaW9uID0gLnhAaWQsIG5jYmlfdGF4aWQgPSAueEB0eGlkLCBuY2JpX29yZ2FuaXNtID0gLnhAb3JnbnNtLCBzZXFfbmFtZSA9IC54QGRmbG4sIHNlcSA9IHJhd1RvQ2hhcigueEBzcSkpKQojd3JpdGVfY3N2KHNlcV9pbmZvLGZpbGU9J3RhYmxlcy9zZXFfaW5mby5jc3YnKQpgYGAKCkZpbmFsbHksIGxldCdzIG1ha2UgYSB0YWJsZSB3aXRoIHRheG9ub21pYyBpbmZvcm1hdGlvbgoKYGBge3J9CiMgdGF4X2luZm8gPSBzZXFfaW5mbyAlPiUgCiMgICBzZWxlY3QobmNiaV90YXhpZCkgJT4lIAojICAgZGlzdGluY3QoKSAlPiUgCiMgICBtdXRhdGUobmNiaV90YXhfbmFtZSA9IGdldF90eF9zbG90KHBoeWxvX2dlbnVzXzIwLCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB0eGlkID0gbmNiaV90YXhpZCwgCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2x0X25tID0gJ3Njbm0nKSwKIyAgICAgICAgICBuY2JpX2dlbnVzX2lkID0gZ2V0X3R4aWRzKHBoeWxvX2dlbnVzXzIwLCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHR4aWRzID0gbmNiaV90YXhpZCwgCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBybmsgPSAnZ2VudXMnKSwKIyAgICAgICAgICBuY2JpX2dlbnVzX25hbWUgPSBnZXRfdHhfc2xvdChwaHlsb19nZW51c18yMCwgCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHhpZCA9IG5jYmlfZ2VudXNfaWQsIAojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHNsdF9ubSA9ICdzY25tJyksCiMgICAgICAgICAgbmNiaV9mYW1pbHlfaWQgPSBnZXRfdHhpZHMocGh5bG9fZ2VudXNfMjAsIAojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgdHhpZHMgPSBuY2JpX3RheGlkLCAKIyAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHJuayA9ICdmYW1pbHknKSwKIyAgICAgICAgICBuY2JpX2ZhbWlseV9uYW1lID0gZ2V0X3R4X3Nsb3QocGh5bG9fZ2VudXNfMjAsIAojICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHR4aWQgPSBuY2JpX2ZhbWlseV9pZCwgCiMgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2x0X25tID0gJ3Njbm0nKSwKIyAgICAgICAgICApCiMgd3JpdGVfY3N2KHRheF9pbmZvLGZpbGU9J3RhYmxlcy90YXhfaW5mby5jc3YnKQpgYGAKCiMgQ2hlY2tpbmcgZGF0YXNldAoKTm93IHdlIGhhdmUgZXZlcnl0aGluZyBvdXQgb2YgcGh5bG90YVIuIFdlIGNhbiBsb2FkIHRhYmxlcyBhbmQgd29yayBpbiBSc3R1ZGlvLgoKYGBge3J9CmNsdXN0ZXJfc2VxcyA9IHJlYWRfY3N2KGZpbGU9J3RhYmxlcy9jbHVzdGVyX3NlcXMuY3N2JykKY2x1c3Rlcl9zZXFzCmBgYAoKYGBge3J9CnNlcV9pbmZvID0gcmVhZF9jc3YoZmlsZT0ndGFibGVzL3NlcV9pbmZvLmNzdicpCnNlcV9pbmZvIApgYGAKCmBgYHtyfQp0YXhfaW5mbyA9IHJlYWRfY3N2KGZpbGU9J3RhYmxlcy90YXhfaW5mby5jc3YnKQp0YXhfaW5mbyAKYGBgCgpOb3cgbGV0J3MgcHVsbCBvdXIgdGFibGUgd2l0aCBzYW1wbGVzIGZvciB3aGljaCB3ZSBoYXZlIHBoZW5vdHlwZXMsIHNvIHdlIGNhbiBjb21iaW5lIGV2ZXJ5dGhpbmc6CgpgYGB7cn0KcGhlbm90eXBlX3RhYmxlID0gcmVhZF9jc3YoJ3VwZGF0ZWRfY3V0aWNsZV9wYXBlcnNfdGF4YS5jc3YnKQpwaGVub3R5cGVfdGFibGUKYGBgCgojIyBIb3cgbWFueSBnZW5lcmEgYXJlIHJlcHJlc2VudGVkIGJ5IGF0IGxlYXN0IG9uZSBzZXF1ZW5jZSBmb3IgZWFjaCBnZW5lPwoKRmlyc3QsIGxldCdzIGNyZWF0ZSB0aGUgdGFibGUgdGhhdCBJIG5lZWQgZm9yIHRoaXMuIFdlIHdpbGwgZmlyc3Qgc3VtbWFyaXplIHRoZSBkYXRhIHRoYXQgd2UgZ290IHdpdGggcGh5bG90YVIgYW5kIHRoZW4gYWRkIHRoZSBnZW5lcmEgbWlzc2luZyBmcm9tIHBoeWxvdGFSOgpgYGB7cn0Kb2NjX21hdHJpeCA9IGNsdXN0ZXJfc2VxcyAlPiUgCiAgbGVmdF9qb2luKGdlbmVfaWRlbnRpdGllcykgJT4lIAogIGxlZnRfam9pbih0YXhfaW5mbykgJT4lCiAgc2VsZWN0KG5jYmlfZ2VudXNfaWQsbmNiaV9nZW51c19uYW1lLG5jYmlfZmFtaWx5X25hbWUsZ2VuZSkgJT4lCiAgZGlzdGluY3QoKSAlPiUKICBtdXRhdGUobmNiaV9nZW51c19uYW1lID0gZmFjdG9yKG5jYmlfZ2VudXNfbmFtZSwgb3JkZXJlZCA9IFQpLAogICAgICAgICBnZW5lID0gZmN0X2luZnJlcShnZW5lLCBvcmRlcmVkID0gVCksCiAgICAgICAgIHNlcXVlbmNlZCA9IFQKICAgICAgICAgKSAKCm9jY19tYXRyaXggPSBleHBhbmQob2NjX21hdHJpeCwgbmNiaV9nZW51c19uYW1lLCBnZW5lKSAlPiUKICBsZWZ0X2pvaW4oc2VsZWN0KG9jY19tYXRyaXgsbmNiaV9nZW51c19uYW1lLCBnZW5lLCBzZXF1ZW5jZWQpKQoKbWlzc2luZ19nZW5lcmEgPSBwaGVub3R5cGVfdGFibGUgJT4lCiAgc2VsZWN0KG5jYmlfZ2VudXNfbmFtZSA9IHVwZGF0ZWRfZ2VudXMpICU+JQogIGZpbHRlcighKG5jYmlfZ2VudXNfbmFtZSAlaW4lIG9jY19tYXRyaXgkbmNiaV9nZW51c19uYW1lKSkgJT4lCiAgbmEuZXhjbHVkZSgpICU+JQogIHB1bGwobmNiaV9nZW51c19uYW1lKSAlPiUKICB1bmlxdWUoKQogIApvY2NfbWF0cml4ID0gZXhwYW5kLmdyaWQobmNiaV9nZW51c19uYW1lID0gbWlzc2luZ19nZW5lcmEsIAogICAgICAgICAgICAgICAgICAgICAgICAgZ2VuZSA9IHVuaXF1ZShvY2NfbWF0cml4JGdlbmUpKSAlPiUKICBtdXRhdGUoc2VxdWVuY2VkID0gRikgJT4lCiAgYmluZF9yb3dzKG11dGF0ZShvY2NfbWF0cml4LCBuY2JpX2dlbnVzX25hbWUgPSBhcy5jaGFyYWN0ZXIobmNiaV9nZW51c19uYW1lKSkpICU+JQogICBtdXRhdGUoc2VxdWVuY2VkID0gcmVwbGFjZV9uYShzZXF1ZW5jZWQsIEZBTFNFKSwKICAgICAgICAgbmNiaV9nZW51c19uYW1lID0gZmN0X3Jlb3JkZXIobmNiaV9nZW51c19uYW1lLHNlcXVlbmNlZCwuZnVuID0gc3VtLC5kZXNjID0gRikpIAoKb2NjX21hdHJpeAoKYGBgCgoKCk5vdyBsZXQncyBwbG90CmBgYHtyfQpwMSA9IGdncGxvdChvY2NfbWF0cml4KSArCiAgZ2VvbV90aWxlKGFlcyh4PWdlbmUseT1uY2JpX2dlbnVzX25hbWUsZmlsbD1zZXF1ZW5jZWQpKSArCiAgc2NhbGVfZmlsbF9icmV3ZXIodHlwZSA9ICdxdWFsJyxwYWxldHRlID0gJ1NwZWN0cmFsJykgKwogIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF90ZXh0KHNpemUgPSAzKSwKICAgICAgICBheGlzLnRleHQueCA9IGVsZW1lbnRfdGV4dChoanVzdCA9IDEsYW5nbGUgPSAzMCkpCnAxCmBgYApgYGB7cn0KcDIgPSBvY2NfbWF0cml4ICU+JQogIGdyb3VwX2J5KG5jYmlfZ2VudXNfbmFtZSkgJT4lCiAgc3VtbWFyaXplKHNlcXVlbmNlZCA9IG1lYW4oc2VxdWVuY2VkKSkgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21fY29sKGFlcyh4PXNlcXVlbmNlZCx5PW5jYmlfZ2VudXNfbmFtZSkpICsKICBzY2FsZV94X2NvbnRpbnVvdXMobGFiZWxzID0gIHNjYWxlczo6cGVyY2VudCwgbGltaXRzID0gYygwLDEpKSArCiAgZ2d0aGVtZXM6OnRoZW1lX3R1ZnRlKCkgKwogIHRoZW1lKGF4aXMudGV4dC55ID0gZWxlbWVudF9ibGFuaygpLAogICAgICAgIGF4aXMudGl0bGUueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLmxpbmUueSA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRpY2tzLnkgPSBlbGVtZW50X2JsYW5rKCkpIApwMgpgYGAKCmBgYHtyfQpwMyA9IG9jY19tYXRyaXggJT4lCiAgZ3JvdXBfYnkoZ2VuZSkgJT4lCiAgc3VtbWFyaXplKHNlcXVlbmNlZCA9IG1lYW4oc2VxdWVuY2VkKSkgJT4lCiAgZ2dwbG90KCkgKwogIGdlb21fY29sKGFlcyh4PWdlbmUseT1zZXF1ZW5jZWQpKSArCiAgc2NhbGVfeV9jb250aW51b3VzKGxhYmVscyA9ICBzY2FsZXM6OnBlcmNlbnQsIGxpbWl0cyA9IGMoMCwxKSkgKwogIGdndGhlbWVzOjp0aGVtZV90dWZ0ZSgpICsKICB0aGVtZShheGlzLnRleHQueCA9IGVsZW1lbnRfYmxhbmsoKSwKICAgICAgICBheGlzLnRpdGxlLnggPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgYXhpcy5saW5lLnggPSBlbGVtZW50X2JsYW5rKCksCiAgICAgICAgYXhpcy50aWNrcy54ID0gZWxlbWVudF9ibGFuaygpKQpwMwpgYGAKCk5vdyBsZXQncyBwdXQgZXZlcnl0aGluZyB0b2dldGhlci4KYGBge3J9CnAgPSBjb3dwbG90OjppbnNlcnRfeGF4aXNfZ3JvYihwMSxwMykKcCA9IGNvd3Bsb3Q6Omluc2VydF95YXhpc19ncm9iKHAscDIpCnAgPSBjb3dwbG90OjpnZ2RyYXcocCkKZ2dzYXZlKHBsb3QgPSBwLGZpbGVuYW1lID0gJ2dlbmVfb2NjdXBhbmN5LnBkZicsZGV2aWNlID0gJ3BkZicscGF0aCA9ICdwbG90cycsaGVpZ2h0ID0gMjUpCmBgYAoKV2hhdCBwcm9wb3J0aW9uIG9mIGdlbmVyYSBoYXZlIGF0IGxlYXN0IG9uZSBzZXF1ZW5jZSBmb3IgdGhlIDcgZ2VuZXMgd2l0aCBiZXR0ZXIgY292ZXJhZ2U/CmBgYHtyfQpiZXN0X2dlbmVzID0gb2NjX21hdHJpeCAlPiUKICBncm91cF9ieShnZW5lKSAlPiUKICBzdW1tYXJpemUoTj1zdW0oc2VxdWVuY2VkKSkKCmJlc3RfZ2VuZXMgCgpnZW5lc190b19rZWVwID0gYmVzdF9nZW5lcyRnZW5lWzE6N10KCm9jY19tYXRyaXggJT4lCiAgZmlsdGVyKGdlbmUgJWluJSBnZW5lc190b19rZWVwKSAlPiUKICBncm91cF9ieShuY2JpX2dlbnVzX25hbWUpICU+JQogIHN1bW1hcml6ZShhbnlfc2VxdWVuY2VkID0gYW55KHNlcXVlbmNlZCkpICU+JQogIGdyb3VwX2J5KGFueV9zZXF1ZW5jZWQpICU+JQogIHN1bW1hcml6ZShOID0gbigpKQpgYGAKCkxpc3QgZ2VuZXJhIHdpdGhvdXQgZGF0YS4KYGBge3J9Cm9jY19tYXRyaXggJT4lCiAgZmlsdGVyKGdlbmUgJWluJSBnZW5lc190b19rZWVwKSAlPiUKICBncm91cF9ieShuY2JpX2dlbnVzX25hbWUpICU+JQogIHN1bW1hcml6ZShhbnlfc2VxdWVuY2VkID0gYW55KHNlcXVlbmNlZCkpICU+JQogIGZpbHRlcighYW55X3NlcXVlbmNlZCkgJT4lCiAgcHVsbChuY2JpX2dlbnVzX25hbWUpCmBgYAoKCiMjIEV4cG9ydCBzZXF1ZW5jZXMgZm9yIGFsaWdubWVudAoKRmlyc3QsIGxldCdzIGNyZWF0ZSBsaXN0cyB3aXRoIHNlcXVlbmNlIGluZm9ybWF0aW9uLiBGb3Igc29tZSByZWFzb24sIHRoZSB0YXhvbm9taWMgaW5mb3JtYXRpb24gaW4gdGFibGUgY2x1c3Rlcl9zZXFzIGlzIGluY29ycmVjdCwgc28gbGV0J3MgcmVtb3ZlIGl0LgoKYGBge3J9CmdlbmVfbGlzdHMgPSBzZWxlY3QoY2x1c3Rlcl9zZXFzLC1uY2JpX3RheGlkKSAlPiUKICBsZWZ0X2pvaW4oZ2VuZV9pZGVudGl0aWVzKSAlPiUKICBmaWx0ZXIoZ2VuZSAlaW4lIGdlbmVzX3RvX2tlZXApICU+JQogIGxlZnRfam9pbihzZXFfaW5mbykgJT4lCiAgbGVmdF9qb2luKHRheF9pbmZvKSAlPiUKICBtdXRhdGUoZXhwb3J0X3NlcW5hbWUgPSBzdHJfYyhzdHJfYyhuY2JpX2dlbnVzX25hbWUsJ18nLG5jYmlfZ2VudXNfaWQpLAogICAgICAgICAgICAgICAgICAgICAgICAgc3RyX2Moc3RyX2MoJ2FjY2Vzc2lvbjonLG5jYmlfYWNjZXNzaW9uKSwKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHN0cl9jKCduY2JpX2ZhbWlseV9uYW1lOicsbmNiaV9mYW1pbHlfbmFtZSksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfYygnbmNiaV9mYW1pbHlfaWQ6JyxuY2JpX2ZhbWlseV9pZCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfYygncGh5bG90YVJfY2x1c3RlcjonLENJRCksCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBzdHJfYygnbmNiaV9zZXFuYW1lOicsc2VxX25hbWUpLAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgc2VwPSc7JyksCiAgICAgICAgICAgICAgICAgICAgICAgICBzZXAgPSAnfCcpCiAgICAgICAgICkgJT4lCiAgc2VsZWN0KGdlbmUsc2VxLGV4cG9ydF9zZXFuYW1lKSAlPiUKICBmaWx0ZXIoIWlzLm5hKHNlcSkpICU+JQogIG11dGF0ZShzZXFyZWNvcmQ9c3RyX2Moc3RyX2MoJz4nLGV4cG9ydF9zZXFuYW1lKSwKICAgICAgICAgICAgICAgICAgICAgICAgIHNlcSxzZXA9J1xuJykpCgpnZW5lX2xpc3RzID0gc3BsaXQoZ2VuZV9saXN0cywgZ2VuZV9saXN0cyRnZW5lKQoKZ2VuZV9saXN0cwpgYGAKCk5vdyBsZXQncyBjcmVhdGUgZmFzdGEgZmlsZXMKYGBge3J9CmRpci5jcmVhdGUoJ2V4cG9ydGVkX2Zhc3RhJykKcHVycnI6OndhbGsoZ2VuZV9saXN0cywgLmYgPSB+c3RyX2MoLngkc2VxcmVjb3JkLGNvbGxhcHNlPSdcbicpICU+JSB3cml0ZShmaWxlPWZpbGUucGF0aCgnZXhwb3J0ZWRfZmFzdGEnLHN0cl9jKHVuaXF1ZSgueCRnZW5lKSwnLmZhc3RhJykpKSkKYGBgCgoKCgoKCgoKCgoKCg==